Достать данные

Данные представляют собой аннотированные слова, записанные от 6 носителей в селении Красный Восток:

ls raw_data
## d23_stress.TextGrid
## d23_stress.wav
## d25_stress.TextGrid
## d25_stress.wav
## d26_stress.TextGrid
## d26_stress.wav
## d27_stress.TextGrid
## d27_stress.wav
## d28_stress.TextGrid
## d28_stress.wav
## d30_stress.TextGrid
## d30_stress.wav

От каждого носителя было записано от 74 до 79 стимульных слов:

library(phonfieldwork)
textgrids <- read_from_folder("raw_data", "textgrid")
textgrids %>% 
  filter(tier == 1) %>% 
  distinct(source, content) %>% 
  count(source)

Чего мы удалили? Про это нужно написать.

textgrids %>% 
  filter(tier_name %in% c("labels", "error")) %>% 
  select(-tier, -id) %>% 
  pivot_wider(names_from = tier_name, values_from = content) %>% 
  mutate(speaker = str_remove(source, ".TextGrid"),
         word = labels) %>% 
  filter(!is.na(error), error != "") %>% 
  select(speaker, word) ->
  to_remove
to_remove

Дальше следует описание данных, см. Appendix

draw_sound(file_name = "raw_data/d23_stress.wav", 
           annotation = "raw_data/d23_stress.TextGrid",
           from = 0,
           to = 10.6,
           zoom = c(3.8, 4.4))

После этого мы применяем к этим данным вот этот скрипт:

cat get_pitch_intencity.praat
## # This is a Praat script made for investigation of Abaza vowels. It analyses multiple selected sounds 
## # (TextGrids should be also uploaded to Praat Obects). The file should have the following structure:
## # * first tier --- word label
## # * second tier --- translation label
## # * third tier --- sound label
## # * fourth tier --- utterance label
## 
## 
## # This script is distributed under the GNU General Public License.
## # George Moroz 09.05.2022
## 
## form Get Pitch listing from a file
##   comment Where should the script write a result file
##   text directory /home/agricolamz/for_work/HSE/students/2022_Kuznetsova/data/
##   comment How should the script name a result file
##   text resultfile log.txt
##   comment Time step
##   real step 0.01
##   comment Pitch floor (Hz)
##   integer floor 90
##   comment Pitch ceiling (Hz)
##   integer ceiling 250
##   comment 5. formant ceiling (Hz)
##   integer fceiling 5500
##   comment Minimum pitch for intensity (Hz)
##   integer mpitch 200  
## endform
## 
## n = numberOfSelected("Sound")
## for j to n
##  sound[j] = selected("Sound", j)
## endfor
## for k to n
##  selectObject: sound[k]
##  object_name$ = selected$ ("Sound")
##  select TextGrid 'object_name$'
##  number_of_intervals = Get number of intervals... 3
##          for b from 1 to number_of_intervals
##              select TextGrid 'object_name$'
##              interval_label$ = Get label of interval... 3 'b'
##              utterance$ = Get label of interval... 4 'b'
##              if interval_label$ <> ""
##                  start = Get starting point... 3 'b'
##                  end = Get end point... 3 'b'
##                              duration = end - start
##                  int_1 = Get interval at time... 1 end
##                  word$ = Get label of interval... 1 int_1
##                  trans$ = Get label of interval... 2 int_1
##                  select Sound 'object_name$'
##                  s = Extract part: start, end, "rectangular", 1, "yes"
##                  select s
##                  fragment_name$ = selected$ ("Sound")
##                  pitch = To Pitch... step floor ceiling
##                  selectObject: s
##                  formant = To Formant (burg): 0, 5, fceiling, 0.025, 50
##                     selectObject: s
##                     intensity = To Intensity: mpitch, 0, "no"
##                  i = start
##                  while i <= end
##                      select Pitch 'fragment_name$'
##                      f0 = Get value at time... 'i' Hertz Linear
##                      select Formant 'fragment_name$'
##                      f1 = Get value at time: 1, i, "Hertz", "Linear"
##                      f2 = Get value at time: 2, i, "Hertz", "Linear"
##                      f3 = Get value at time: 3, i, "Hertz", "Linear"
##                                  select Intensity 'fragment_name$'
##                                  intvalue = Get value at time: 'i', "cubic"
##                      i = i + 0.01
##                      fileappend "'directory$''resultfile$'" 'object_name$''tab$''interval_label$''tab$''utterance$''tab$''word$''tab$''trans$''tab$''f0''tab$''f1''tab$''f2''tab$''f3''tab$''intvalue''tab$''duration''tab$''i''newline$'
##                  endwhile
##                  removeObject: s
##                  removeObject: pitch
##                  removeObject: formant
##                  removeObject: intensity
##              endif
##          endfor
## #    removeObject: "Sound 'object_name$'"
## #    removeObject: "TextGrid 'object_name$'"
## endfor

Скрипт идет по аннотации и с шагом в 1 мс и базовыми настройками, отраженными в меню (между form и endform), берет значения длительности, f0, f1, f2, f3 и инетнсивности. Получается вот такая вот таблица:

df <- read_tsv("data/log.txt", col_names = FALSE)
colnames(df) <- c("speaker", "vowel", "utterance", "word", "translation", "f0", "f1", "f2", "f3", "intensity", "duration", "step")
df

Дальше с данными можно делать разное, я просто возьму среднее по всем параметрами и создам переменную minimal_pair, которая будет включать минимальную пару:

df %>% 
  mutate_all(function(i){str_replace(i, "--undefined--", NA_character_)}) %>%
  mutate(across(f0:duration, as.double),
         stressed = ifelse(vowel == toupper(vowel), "stressed", "unstressed"),
         stressed = factor(stressed, levels = c("unstressed", "stressed")),
         vowel_n = str_extract(utterance, "V\\d"),
         vowel_n = as.double(str_remove(vowel_n, "V")),
         utterance = str_extract(utterance, "u\\d"),
         utterance = as.double(str_remove(utterance, "u"))) %>% 
  filter(str_detect(vowel, "[аА]"),
         utterance <= 4) %>% 
  select(-step) %>% 
  anti_join(to_remove) %>% 
  group_by(speaker, utterance, stressed, word, vowel_n) %>% 
  summarise(f0 = mean(f0, na.rm = TRUE),
            f1 = mean(f1, na.rm = TRUE),
            f2 = mean(f2, na.rm = TRUE),
            f3 = mean(f3, na.rm = TRUE),
            intensity = mean(intensity, na.rm = TRUE),
            duration = mean(duration, na.rm = TRUE)) %>% 
  mutate(vowel_n = str_c(vowel_n, ". syllable"),
         utterance = str_c(utterance, ". utterance"),
         duration = duration*1000) %>% 
  ungroup() %>% 
  mutate(word_pair = as.double(factor(tolower(word)))) %>% 
  group_by(word_pair) %>% 
  mutate(minimal_pair = str_c(unique(str_c(word, "_")), collapse = ""),
         minimal_pair = str_remove(minimal_pair, "_$")) %>% 
  ungroup() ->
  mean_values
mean_values

Вы спрашивали, какие минимальные пары вошли в анализ?

mean_values %>% 
  distinct(speaker, word_pair, minimal_pair) %>% 
  pivot_wider(names_from = speaker, values_from = minimal_pair) %>% 
  select(-word_pair) %>% 
  write_csv("~/Desktop/pairs.csv")

График с разницами между минимальными парами (ударный слог минус безударный). Если разницы нет, то горб должен возвышаться над 0. Если же он от нуля смещен, то значит разница между ударными и безударными слогами есть.

mean_values %>% 
  select(speaker, utterance, stressed, minimal_pair, vowel_n, duration) %>% 
  pivot_wider(names_from = stressed, values_from = duration) %>% 
  mutate(duration_difference = stressed-unstressed) %>% 
  ggplot(aes(duration_difference, fill = vowel_n))+
  geom_density(alpha = 0.4)+
  facet_grid(speaker~utterance, scales = "free")

mean_values %>% 
  select(speaker, utterance, stressed, minimal_pair, vowel_n, f0) %>% 
  pivot_wider(names_from = stressed, values_from = f0) %>% 
  mutate(f0_difference = stressed-unstressed) %>% 
  ggplot(aes(f0_difference, fill = vowel_n))+
  geom_density(alpha = 0.4)+
  facet_grid(speaker~utterance, scales = "free")

mean_values %>% 
  select(speaker, utterance, stressed, minimal_pair, vowel_n, intensity) %>% 
  pivot_wider(names_from = stressed, values_from = intensity) %>% 
  mutate(intensity_difference = stressed-unstressed) %>% 
  ggplot(aes(intensity_difference, fill = vowel_n))+
  geom_density(alpha = 0.4)+
  facet_grid(speaker~utterance, scales = "free")

Статистика

Я использую байесовскую логистическую регрессию со смешанными эффектами с дефолтными прайерами и формула выглядит вот так:

stressed ~ ПЕРЕМЕННАЯ * vowel_n + (1|speaker) + (1|minimal_pair/utterance)

Получается взаимодействие переменной и номера гласного в слове с носителем в смешанных эффектах и номером произнесения вложенным в минимальную пару в другом смешанном эффекте.

library(brms)
mean_values %>% 
  select(speaker, utterance, stressed, minimal_pair, vowel_n, duration) %>% 
  brm(stressed ~ duration*vowel_n + (1|speaker) + (vowel_n+1|minimal_pair/utterance),
      family = bernoulli(),
      data = .) ->
  fit_duration
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000905 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 9.05 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 95.9162 seconds (Warm-up)
## Chain 1:                36.8241 seconds (Sampling)
## Chain 1:                132.74 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.00055 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 5.5 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 98.5351 seconds (Warm-up)
## Chain 2:                35.4575 seconds (Sampling)
## Chain 2:                133.993 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.000611 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 6.11 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 91.2953 seconds (Warm-up)
## Chain 3:                38.3185 seconds (Sampling)
## Chain 3:                129.614 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.000573 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 5.73 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 95.6344 seconds (Warm-up)
## Chain 4:                36.0296 seconds (Sampling)
## Chain 4:                131.664 seconds (Total)
## Chain 4:
fit_duration
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: stressed ~ duration * vowel_n + (1 | speaker) + (vowel_n + 1 | minimal_pair/utterance) 
##    Data: . (Number of observations: 3398) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~minimal_pair (Number of levels: 40) 
##                                  Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                        1.30      0.19     0.99     1.72 1.00
## sd(vowel_n2.syllable)                1.21      0.20     0.86     1.63 1.00
## cor(Intercept,vowel_n2.syllable)    -0.98      0.02    -1.00    -0.93 1.00
##                                  Bulk_ESS Tail_ESS
## sd(Intercept)                         857     1536
## sd(vowel_n2.syllable)                1007     1823
## cor(Intercept,vowel_n2.syllable)     1296     1624
## 
## ~minimal_pair:utterance (Number of levels: 160) 
##                                  Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                        0.35      0.12     0.09     0.57 1.01
## sd(vowel_n2.syllable)                0.14      0.11     0.00     0.40 1.00
## cor(Intercept,vowel_n2.syllable)     0.18      0.55    -0.90     0.96 1.00
##                                  Bulk_ESS Tail_ESS
## sd(Intercept)                         710      616
## sd(vowel_n2.syllable)                 760     1115
## cor(Intercept,vowel_n2.syllable)     2379     2051
## 
## ~speaker (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     1.57      0.56     0.85     3.01 1.00     1438     2136
## 
## Population-Level Effects: 
##                            Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -8.64      0.77   -10.17    -7.15 1.00      897
## duration                       0.06      0.00     0.05     0.06 1.00     2906
## vowel_n2.syllable              1.33      0.47     0.42     2.23 1.00     1913
## duration:vowel_n2.syllable    -0.02      0.00    -0.03    -0.01 1.00     2952
##                            Tail_ESS
## Intercept                      1729
## duration                       3207
## vowel_n2.syllable              2272
## duration:vowel_n2.syllable     2871
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_duration)

conditional_effects(fit_duration,
                    effects = c("duration:vowel_n"))

mean_values %>% 
  select(speaker, utterance, stressed, minimal_pair, vowel_n, f0) %>% 
  brm(stressed ~ f0*vowel_n + (1|speaker) + (vowel_n+1|minimal_pair/utterance),
      family = bernoulli(),
      data = .) ->
  fit_f0
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000511 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 5.11 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 89.1885 seconds (Warm-up)
## Chain 1:                35.5903 seconds (Sampling)
## Chain 1:                124.779 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000761 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 7.61 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 103.23 seconds (Warm-up)
## Chain 2:                37.8262 seconds (Sampling)
## Chain 2:                141.056 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.000514 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 5.14 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 92.4354 seconds (Warm-up)
## Chain 3:                36.1146 seconds (Sampling)
## Chain 3:                128.55 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.000548 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 5.48 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 93.83 seconds (Warm-up)
## Chain 4:                38.9794 seconds (Sampling)
## Chain 4:                132.809 seconds (Total)
## Chain 4:
fit_f0
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: stressed ~ f0 * vowel_n + (1 | speaker) + (vowel_n + 1 | minimal_pair/utterance) 
##    Data: . (Number of observations: 3367) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~minimal_pair (Number of levels: 40) 
##                                  Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                        0.03      0.03     0.00     0.10 1.00
## sd(vowel_n2.syllable)                0.05      0.04     0.00     0.16 1.00
## cor(Intercept,vowel_n2.syllable)    -0.15      0.58    -0.97     0.94 1.00
##                                  Bulk_ESS Tail_ESS
## sd(Intercept)                        3459     2620
## sd(vowel_n2.syllable)                3696     2419
## cor(Intercept,vowel_n2.syllable)     7496     2685
## 
## ~minimal_pair:utterance (Number of levels: 160) 
##                                  Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                        0.03      0.03     0.00     0.10 1.00
## sd(vowel_n2.syllable)                0.06      0.04     0.00     0.16 1.00
## cor(Intercept,vowel_n2.syllable)    -0.15      0.58    -0.97     0.92 1.00
##                                  Bulk_ESS Tail_ESS
## sd(Intercept)                        3845     1967
## sd(vowel_n2.syllable)                2768     2486
## cor(Intercept,vowel_n2.syllable)     5900     2176
## 
## ~speaker (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.17      0.12     0.02     0.45 1.00     1294     1381
## 
## Population-Level Effects: 
##                      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                0.22      0.32    -0.41     0.86 1.00     4772
## f0                      -0.00      0.00    -0.00     0.00 1.00     5198
## vowel_n2.syllable       -5.50      0.47    -6.45    -4.58 1.00     7666
## f0:vowel_n2.syllable     0.03      0.00     0.03     0.04 1.00     7441
##                      Tail_ESS
## Intercept                3116
## f0                       3531
## vowel_n2.syllable        2685
## f0:vowel_n2.syllable     2696
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_f0)

conditional_effects(fit_f0,
                    effects = c("f0:vowel_n"))

mean_values %>% 
  select(speaker, utterance, stressed, minimal_pair, vowel_n, intensity) %>% 
  brm(stressed ~ intensity*vowel_n + (1|speaker) + (vowel_n+1|minimal_pair/utterance),
      family = bernoulli(),
      data = .) ->
  fit_intensity
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.000645 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 6.45 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 150.928 seconds (Warm-up)
## Chain 1:                153.712 seconds (Sampling)
## Chain 1:                304.64 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0.000532 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 5.32 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 162.35 seconds (Warm-up)
## Chain 2:                160.297 seconds (Sampling)
## Chain 2:                322.647 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0.000513 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 5.13 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 158.589 seconds (Warm-up)
## Chain 3:                161.77 seconds (Sampling)
## Chain 3:                320.359 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'e67f1fb651efe88012e69c4f524f5c99' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0.00055 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 5.5 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 166.32 seconds (Warm-up)
## Chain 4:                152.625 seconds (Sampling)
## Chain 4:                318.945 seconds (Total)
## Chain 4:
fit_intensity
##  Family: bernoulli 
##   Links: mu = logit 
## Formula: stressed ~ intensity * vowel_n + (1 | speaker) + (vowel_n + 1 | minimal_pair/utterance) 
##    Data: . (Number of observations: 3398) 
##   Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
##          total post-warmup draws = 4000
## 
## Group-Level Effects: 
## ~minimal_pair (Number of levels: 40) 
##                                  Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                        0.04      0.03     0.00     0.12 1.00
## sd(vowel_n2.syllable)                0.07      0.06     0.00     0.21 1.00
## cor(Intercept,vowel_n2.syllable)    -0.10      0.57    -0.96     0.92 1.00
##                                  Bulk_ESS Tail_ESS
## sd(Intercept)                        3202     2367
## sd(vowel_n2.syllable)                2826     2500
## cor(Intercept,vowel_n2.syllable)     5499     3215
## 
## ~minimal_pair:utterance (Number of levels: 160) 
##                                  Estimate Est.Error l-95% CI u-95% CI Rhat
## sd(Intercept)                        0.04      0.03     0.00     0.11 1.00
## sd(vowel_n2.syllable)                0.08      0.06     0.00     0.23 1.00
## cor(Intercept,vowel_n2.syllable)    -0.09      0.57    -0.95     0.94 1.00
##                                  Bulk_ESS Tail_ESS
## sd(Intercept)                        3933     2472
## sd(vowel_n2.syllable)                2721     2373
## cor(Intercept,vowel_n2.syllable)     5725     3244
## 
## ~speaker (Number of levels: 6) 
##               Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## sd(Intercept)     0.48      0.24     0.22     1.09 1.00     1463     1932
## 
## Population-Level Effects: 
##                             Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
## Intercept                     -13.46      2.28   -17.93    -9.09 1.00     3567
## intensity                       0.20      0.03     0.14     0.27 1.00     3601
## vowel_n2.syllable             -13.24      2.61   -18.37    -7.98 1.00     3890
## intensity:vowel_n2.syllable     0.22      0.04     0.14     0.30 1.00     3908
##                             Tail_ESS
## Intercept                       3311
## intensity                       3155
## vowel_n2.syllable               3107
## intensity:vowel_n2.syllable     3080
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
plot(fit_intensity)

conditional_effects(fit_intensity,
                    effects = c("intensity:vowel_n"))

Appendix: list of stimuli

textgrids %>% 
  filter(tier == 1) %>% 
  distinct(content, source) %>% 
  rename(word = content,
         speaker = source) %>% 
  mutate(speaker = str_extract(speaker, "d\\d\\d"))

Нужно в IPA перевести… https://github.com/agricolamz/abaza_cyrillic_to_trans